Wisconsin Prognosis

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data

dataBreast <- read.csv("~/GitHub/RISKPLOTS/DATA/wpbc.data", header=FALSE)
table(dataBreast$V2)
## 
##   N   R 
## 151  47
rownames(dataBreast) <- dataBreast$V1
dataBreast$V1 <- NULL
dataBreast$status <- 1*(dataBreast$V2=="R")
dataBreast$V2 <- NULL
dataBreast$time <- dataBreast$V3
dataBreast$V3 <- NULL
dataBreast <- sapply(dataBreast,as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
dataBreast <- as.data.frame(dataBreast[complete.cases(dataBreast),])
table(dataBreast$status)
## 
##   0   1 
## 148  46

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataBreast)

[+++]

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
V24 0.09072 1.03 1.09 1.17 0.598 0.237
V27 0.00015 1.00 1.00 1.00 0.608 0.727
V26 0.00156 1.00 1.00 1.00 0.593 0.634
V35 0.01034 1.00 1.01 1.02 0.727 0.608
V34 0.01227 1.00 1.01 1.02 0.634 0.593
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI
V24 0.598 0.609 0.500 0.609 0.0619 0.437 2.87
V27 0.613 0.608 0.641 0.597 0.0562 0.447 2.72
V26 0.608 0.598 0.618 0.616 0.0553 0.350 2.57
V35 0.613 0.641 0.608 0.597 0.0288 0.551 2.27
V34 0.608 0.618 0.598 0.616 0.0247 0.411 2.19
  z.NRI Delta.AUC Frequency
V24 2.67 0.10914 1
V27 2.72 -0.04436 1
V26 2.10 -0.00191 1
V35 3.41 -0.01160 1
V34 2.47 0.01763 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataBreast)
timeinterval <- 2*mean(subset(dataBreast,status==1)$time)

h0 <- sum(dataBreast$status & dataBreast$time <= timeinterval)
h0 <- h0/sum((dataBreast$time > timeinterval) | (dataBreast$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.323 51.1
rdata <- cbind(dataBreast$status,ppoisGzero(index,h0))
rownames(rdata) <- rownames(dataBreast)

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Raw Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.430 0.254 0.155 0.1499 0.500
RR 2.183 2.609 4.322 17.2193 2.142
SEN 0.261 0.739 0.978 1.0000 0.152
SPE 0.899 0.547 0.108 0.0473 0.946
BACC 0.580 0.643 0.543 0.5236 0.549
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.836 0.612 1.12 0.251
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1 1 0.951 1.05
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.783 0.783 0.778 0.788
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.689 0.692 0.61 0.765
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.648 0.556 0.739
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.239 0.126 0.388
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.433
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
2.18 1.29 3.7
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 9.981928 on 1 degrees of freedom, p = 0.001581
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 168 35 41.4 0.989 9.98
class=1 26 11 4.6 8.896 9.98

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBreast,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.246 0.762 40.9

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataBreast$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Calibrated Train: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.349 0.200 0.120 0.1164 0.506
RR 2.183 2.609 4.322 17.2193 2.544
SEN 0.261 0.739 0.978 1.0000 0.087
SPE 0.899 0.547 0.108 0.0473 0.980
BACC 0.580 0.643 0.543 0.5236 0.533
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.876 0.641 1.17 0.408
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.05 1.05 1 1.11
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.959 0.959 0.951 0.967
pander::pander(t(rrAnalysisTrain$c.index$cstatCI),caption="C. Index")
C. Index
mean.C Index median lower upper
0.689 0.687 0.605 0.76
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.648 0.556 0.739
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.239 0.126 0.388
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.351
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
2.18 1.29 3.7
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 9.981928 on 1 degrees of freedom, p = 0.001581
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 168 35 41.4 0.989 9.98
class=1 26 11 4.6 8.896 9.98

Cross-Validation

Here we use the estimated h0 and timeinterval from the full set

rcv <- randomCV(theData=dataBreast,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.9,
                repetitions=100,
                classSamplingType = "Pro"
         )

.[+++].[++].[+++].[+++++].[+++].[++].[–].[+++++].[+++].[++++]10 Tested: 121 Avg. Selected: 3.6 Min Tests: 1 Max Tests: 5 Mean Tests: 1.652893 . MAD: 0.4767596

.[-+].[++++].[+++].[+++].[+].[+++++].[++++].[+].[+++].[++++]20 Tested: 165 Avg. Selected: 3.5 Min Tests: 1 Max Tests: 10 Mean Tests: 2.424242 . MAD: 0.489996

.[++++].[+++++++].[+++++].[+++++].[+++++].[+].[++++++++].[+++++].[+++++].[++++++]30 Tested: 184 Avg. Selected: 4.166667 Min Tests: 1 Max Tests: 10 Mean Tests: 3.26087 . MAD: 0.485977

.[++++++++].[+++].[++++++].[++++].[++++].[+].[++].[+++].[++++].[+++]40 Tested: 191 Avg. Selected: 4.15 Min Tests: 1 Max Tests: 11 Mean Tests: 4.188482 . MAD: 0.486274

.[+++].[++++].[+++++].[++++++].[++++].[++++++].[+++].[-++++].[++++].[–]50 Tested: 193 Avg. Selected: 4.18 Min Tests: 1 Max Tests: 12 Mean Tests: 5.181347 . MAD: 0.4853473

.[++++++].[+++].[+++].[++++++].[+++++++].[++++].[++++++].[+++++].[++++++].[++++++]60 Tested: 194 Avg. Selected: 4.433333 Min Tests: 2 Max Tests: 14 Mean Tests: 6.185567 . MAD: 0.4850705

.[++++].[++++].[++++].[+++].[++++++].[+].[++++].[+].[++++].[+]70 Tested: 194 Avg. Selected: 4.314286 Min Tests: 2 Max Tests: 15 Mean Tests: 7.216495 . MAD: 0.4840154

.[+++++].[++++++].[++++].[++].[+++++].[++++].[+++++++].[+++].[+].[++++]80 Tested: 194 Avg. Selected: 4.325 Min Tests: 3 Max Tests: 17 Mean Tests: 8.247423 . MAD: 0.4835995

.[+].[+++++].[++++].[+++++].[–].[++++].[++++].[+++].[++++++].[+++]90 Tested: 194 Avg. Selected: 4.266667 Min Tests: 3 Max Tests: 19 Mean Tests: 9.278351 . MAD: 0.4842785

.[+++++].[++].[+++].[++++].[++++].[+++].[+++++].[+++].[+++++].[+++]100 Tested: 194 Avg. Selected: 4.27 Min Tests: 4 Max Tests: 20 Mean Tests: 10.30928 . MAD: 0.4835437

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.35 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.348 0.205 0.132 0.1173 0.4980
RR 1.795 2.286 2.389 12.1693 1.8638
SEN 0.217 0.696 0.957 1.0000 0.0652
SPE 0.892 0.561 0.115 0.0338 0.9730
BACC 0.555 0.628 0.536 0.5169 0.5191
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.875 0.64 1.17 0.408
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.01 1.12
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.908 0.908 0.893 0.923
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.644 0.643 0.56 0.718
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.598 0.504 0.693
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.217 0.109 0.364
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.351
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.8 1.02 3.16
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 7.275205 on 1 degrees of freedom, p = 0.006991
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 169 36 41.44 0.714 7.28
class=1 25 10 4.56 6.494 7.28

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.326 1.01 41.3
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

### Calibrated Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.356 0.207 0.133 0.1186 0.5020
RR 1.878 2.286 2.389 12.1693 1.8638
SEN 0.217 0.696 0.957 1.0000 0.0652
SPE 0.899 0.561 0.115 0.0338 0.9730
BACC 0.558 0.628 0.536 0.5169 0.5191
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.873 0.639 1.16 0.408
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.01 1.11
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.9 0.9 0.884 0.915
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.644 0.644 0.56 0.719
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.598 0.504 0.693
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.196 0.0936 0.339
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.359
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.72 0.955 3.11
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 5.384639 on 1 degrees of freedom, p = 0.020315
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 170 37 41.61 0.51 5.38
class=1 24 9 4.39 4.83 5.38